/*******************************************************************************
Project:		Wealth -- Smith, Zidar, and Zwick
Last modified: 	2021-06-15
Description:	This files builds inputs for a version of the interest rate CMD 
				exercise in which group 1 is the top 0.1% and group 2 is the 
				bottom 99.9%. It pulls data from SZ parameters, FRED, the SCF, and 
				collapses of tax data.
*******************************************************************************/
*Environment Variables

cd "~\cmd_replication_public\matlab"

*Dropbox folder
global dboxdirwlth " "
global litroot "$dboxdirwlth/lit"
global dboxstatadirwlth "$dboxdirwlth/stata"
global dtadir "$dboxstatadirwlth/dta"
global rawdir "$dboxstatadirwlth/raw"

global preferred_defn_early_v2 = 88
global preferred_defn_mid_v2 = 98
global preferred_defn_late_v2 = 112

/*******************************************************************************
Helper programs which can also be found in programs_final.do
*******************************************************************************/
capture program drop es_rank_scf/*%<*/
program define es_rank_scf
	syntax, rankvar(string) [othersplitvars(string) outname(string)]

	/***************************************************************************
		(1.0) Parse program inputs
	***************************************************************************/

	foreach varname in `rankvar' `othersplitvars' {
		capture confirm numeric variable `varname', exact
		if _rc > 0 {
			di as error "Variable `varname' not found; do not abbreviate variable names"
			exit 111
		}
	}

	foreach addtlvar in married year {
		capture confirm variable `addtlvar'
		if _rc > 0 {
			di as error "Need `addtlvar' variable"
			exit 111
		}
	}

	if regexm("`othersplitvars'", "`rankvar'") {
		di as error "Warning: ranking variable substring contained within othersplitvars"
		di as error "Ranking variable may be split twice"
	}

	/***************************************************************************
		(1.1) Make SCF rows into ``individuals:'' duplicate rows representing 
			married couples
	***************************************************************************/

	expand 2 if married == 1

	foreach splitvar of varlist `rankvar' `othersplitvars' {
		replace `splitvar' = `splitvar' / 2 if married == 1
	}

	cumul `rankvar' [aw = wgt], by(year) gen(rank)

	tempfile scf_indvls
	save `scf_indvls'

	/***************************************************************************
		(1.2) Load in counts of equal split individuals from SZ 2020 parameters 
			file
	***************************************************************************/

	import excel using $litroot/.../parameters.xlsx, firstrow clear

	keep if inrange(yr, 1989, 2019) & mod(yr - 1989, 3) == 0

	keep yr totadults20 
	rename yr year

	assert totadults20 < 1E6
	replace totadults20 = totadults20 * 1E3 

	sort year

	tempfile parameters
	save `parameters'

	/***************************************************************************
		(1.3) Merge parameters counts onto SCF data and adjust ranking variable
	***************************************************************************/

	use `scf_indvls', clear

	merge m:1 year using `parameters', assert(2 3) keep(3) nogen

    sort year
    egen num_ind = total(wgt), by(year) // Get total individuals according to SCF 

    /* For each observation, compute the percent of individuals in  
        the US (as represented by SCF) with wealth rank above that 
        observation */
    gen pct_ind_above = 1 - rank

    /* Now use this to compute number of households with wealth rank  
        above that observation */
    gen num_ind_above = pct_ind_above * num_ind

    /* Divide the number of HHs in US with greater rank by total  
        number of tax units in that year minus 400 TUs. */
    gen pct_ind_above_taxdata = num_ind_above / (totadults20 - 400)

    /* The rank of the observation among all tax units (excluding the 
        Forbes 400) is one minus the percent of non-Forbes 400 tax units 
        holding greater wealth than the observation */
    if "`outname'" == "" {
    	di "No output name specified; naming adjusted rank es_rank"
    	gen es_rank = 1 - pct_ind_above_taxdata
    }
    else {
    	gen `outname' = 1 - pct_ind_above_taxdata	
    }
    drop num_ind pct_ind_above num_ind_above pct_ind_above_taxdata totadults20 rank

end/*%>*/



/*******************************************************************************
	(0) Prepare interest rates and CPI adjustments data from FRED
*******************************************************************************/



freduse AAA BAA DGS10 DGS5 CPIAUCSL, clear

gen year = yofd(daten)

collapse (mean) CPIAUCSL r_aaa = AAA r_baa = BAA r_ust10 = DGS10 r_ust5 = DGS5, by(year)

qui keep if inrange(year, 1966, 2019)

foreach rate of varlist r_* {
	gen ln_`rate' = ln(`rate')
}

qui sort year
assert year[_N] == 2019

qui gen cpi_adjfactor = CPIAUCSL[_N] / CPIAUCSL
drop CPIAUCSL

tempfile fred
save `fred'

/*******************************************************************************
	(1) Prepare counts of equal-split individuals by year
*******************************************************************************/

import excel using $litroot/.../parameters.xlsx, ///
    firstrow clear

keep yr totadults20 ttinttaxw
qui keep if !missing(totadults20) & inrange(yr, 1966, 2019)

rename (yr totadults20) (year num_ind)

* Scale equal split individuals out of thousands
assert num_ind < 3E5 // Raw data are not the correct order of magnitude
replace num_ind = num_ind * 1E3

merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(3) nogen

qui replace ttinttaxw = ttinttaxw * cpi_adjfactor

* Fixed claims assets are in millions; scale into trillions
qui gen a_total_sz20 = ttinttaxw / 1E6
qui gen ln_a_total_sz20 = ln(a_total_sz20)

isid year
sort year

tempfile parameters
save `parameters'

/*******************************************************************************
	(2) Prepare SCF data
*******************************************************************************/

foreach rankvar in niw intinc agi nonintagi { // Cycle through ranking variables

	/***************************************************************************
		(2.1) Load data and drop redundant variables; rename for simplicity
	***************************************************************************/

	use $dtadir/scf_revision.dta, clear // Load microfile

	replace fixmutf = (0.5 * comutf) + gbmutf + obmutf + mmmf + (0.5 * omutf)
	
	* Keep only necessary variables
	keep year wgt networth_pref intinc agi fixmutf inttaxw married

	qui gen niw = networth_pref - inttaxw - fixmutf // Create non-interest wealth
	qui gen nonintagi = agi - intinc
	
	/***************************************************************************
		(2.2) Implement equal-split ranking
	***************************************************************************/

	ds year wgt `rankvar' married, not
	es_rank_scf, rankvar(`rankvar') othersplitvars("`r(varlist)'")

	qui gen `rankvar'_grp = cond(es_rank > 0.999, 1, 2) // Group into top 0.1/bot99.9

	/***************************************************************************
		(2.3) Collapse to yield sums of interest income and fixed claims by 	
			group, then clean up a bit
	***************************************************************************/

	foreach unweighted of varlist intinc inttaxw { // Weight variables to collapse
		qui replace `unweighted' = `unweighted' * wgt
	}

	collapse (sum) y = intinc a = inttaxw, by(year `rankvar'_grp)

	* Scale down into trillions
	qui replace y = y / 1E12
	qui replace a = a / 1E12 

	/***************************************************************************
		(2.4) Calculate interest rate
	***************************************************************************/

	qui gen r = (y / a) * 100 // Compute interest rate

	/***************************************************************************
		(2.5) Reshape data as wide and compute top group's share of fixed claims
	***************************************************************************/

	qui reshape wide y a r, i(year) j(`rankvar'_grp) // Reshape by group

	qui gen a_total_scf = a1 + a2
	qui gen sa1 = a1 / a_total // Compute top 1's share of fixed claims assets

	qui summ sa1 // Report top 1's average share of fixed claims assets over years
	qui egen mean_sa1 = mean(sa1) // Store avg in variables
	drop sa1

	/***************************************************************************
		(2.6) Merge on CPI adjustment factor and scale to 2019 dollars; also, 
			put variables of interest in logs
	***************************************************************************/

	qui merge 1:1 year using `fred', assert(2 3) keep(3) keepusing(cpi_adjfactor) nogen

	foreach dollarvar of varlist y1 y2 a1 a2 a_total_scf {
		qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	}
	drop cpi_adjfactor

	foreach logvar of varlist y1 y2 a1 a2 r1 r2 a_total_scf {
		gen ln_`logvar' = ln(`logvar')
	}

	/***************************************************************************
		(2.7) Clean up, sort, and save as tempfile
	***************************************************************************/

	isid year
	sort year

	tempfile scf`rankvar'
	qui save `scf`rankvar''
}

/*******************************************************************************
	(3) Prepare tax data
*******************************************************************************/

	/***************************************************************************
		(3.1) Rank by interest income (received from Drew 2021-02-04)
	***************************************************************************/

use $rawdir/.../fixinc_total_ranked_by_fixinc.dta, clear

isid year
rename fixinc? y?

qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

foreach dollarvar of varlist y1 y2 {
	qui replace `dollarvar' = `dollarvar' / 1E12 // Scale down to trillions
	qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	qui gen ln_`dollarvar' = ln(`dollarvar')
}
drop cpi_adjfactor

tempfile taxdata_fixincranks
save `taxdata_fixincranks'

	/***************************************************************************
		(3.2) Rank by non-interest wealth (this is hweal112 - taxbond_3tier 
			before 2001, hweal112 - taxbond_info afterwards)
	***************************************************************************/

forv year = 1966 / 2016 {

	local wlthdefn = cond(`year' < 1980, $preferred_defn_early_v2 + 100, ///
					 cond(`year' < 2001, $preferred_defn_mid_v2 + 100, $preferred_defn_late_v2 + 100))

	qui import delimited using $rawdir/.../wealth_scen`wlthdefn'_es_`year'.csv, clear

	keep group fiint intest taxbond
	qui gen year = `year'

	qui keep if inlist(group, "All", "P99.9-100")

	qui replace group = cond(group == "All", "_all", "_top01")

	qui gen fixinc = fiint + intest
	drop fiint intest

	qui reshape wide fixinc taxbond, i(year) j(group, string)

	qui gen fixinc_bot99pt9 = fixinc_all - fixinc_top01 

	qui gen r_bar = (fixinc_all / taxbond_all) * 100

	keep year fixinc_all fixinc_top01 fixinc_bot99pt9 taxbond_all r_bar

	rename (fixinc_all fixinc_top01 fixinc_bot99pt9 taxbond_all) (y_total y1 y2 a_total)

	qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

	foreach dollarvar of varlist y1 y2 y_total a_total {
		qui replace `dollarvar' = `dollarvar' / 1E12 // Scale down into trillions
		qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	}
	drop cpi_adjfactor

	foreach logvar of varlist y1 y2 a_total r_bar {
		qui gen ln_`logvar' = ln(`logvar')
	}

	tempfile nonintwealth`year'
	qui save `nonintwealth`year''
}

use `nonintwealth1966', clear
forv year = 1967 / 2016 {
	append using `nonintwealth`year''
}

tempfile taxdata_niwranks
save `taxdata_niwranks'

	/***************************************************************************
		(3.3) Rank by AGI
	***************************************************************************/

use $rawdir/.../fixinc_total_ranked_by_agi.dta, clear

gen consolidated_grp = cond(inlist(group, "P99.9-99.99", "P99.99-100"), 1, 2)

collapse (sum) y = fixinc, by(year consolidated_grp)

reshape wide y, i(year) j(consolidated_grp)

assert !missing(y1) & !missing(y2)

qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

foreach dollarvar of varlist y1 y2 {
	qui replace `dollarvar' = `dollarvar' / 1E12 // Scale into trillions
	qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	qui gen ln_`dollarvar' = ln(`dollarvar')
}
drop cpi_adjfactor

tempfile taxdata_agiranks
save `taxdata_agiranks'

	/***************************************************************************
		(3.4) Rank by non-interest AGI (received from Drew 2021-02-04)
	***************************************************************************/

use $rawdir/.../fixinc_total_ranked_by_agi_minus_fixinc.dta, clear

gen consolidated_grp = cond(group == "P99.9-100", 1, 2)

collapse (sum) y = fixinc, by(year consolidated_grp)

reshape wide y, i(year) j(consolidated_grp)

assert !missing(y1) & !missing(y2)

qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

foreach dollarvar of varlist y1 y2 {
	qui replace `dollarvar' = `dollarvar' / 1E12 // Scale into trillions
	qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	qui gen ln_`dollarvar' = ln(`dollarvar')
}
drop cpi_adjfactor

sort year

tempfile taxdata_nonintagiranks
save `taxdata_nonintagiranks'

/*******************************************************************************
	(4) Merge data together
*******************************************************************************/

	/***************************************************************************
		(4.1) Merge on collapses from tax data (incl DINA for the moment) and 
			rename flow concepts to keep track of what's ranked by what
	***************************************************************************/

use `taxdata_fixincranks', clear
rename *y? *y?_fixincrank

merge 1:1 year using `taxdata_agiranks', assert(3) nogen
rename *y? *y?_fiincrank

merge 1:1 year using `taxdata_niwranks', assert(3) nogen
rename *y? *y?_niwrank

merge 1:1 year using `taxdata_nonintagiranks', assert(3) nogen
rename *y? *y?_nonintagirank

	/***************************************************************************
		(4.2) Merge on parameters aggregates and compute and r bar with updated
			SZ 2020 aggregate fixed claims concepts
	***************************************************************************/

merge 1:1 year using `parameters', keepusing(a_total_sz20 ln_a_total_sz20) assert(2 3)
assert year > 2016 if _merge == 2
assert _merge == 3 if year < 2017
drop _merge

qui gen r_bar_sz20 = (y_total / a_total_sz20) * 100
qui gen ln_r_bar_sz20 = ln(r_bar_sz20)
drop y_total

	/***************************************************************************
		(4.3) Merge on credit and interest rate risk measures from FRED
	***************************************************************************/

merge 1:1 year using `fred', keepusing(r_* ln_r_*) assert(3) nogen

	/***************************************************************************
		(4.4) Merge on SCF collapses, renaming concepts so that we can keep 
			track of different ranks
	***************************************************************************/

foreach rankvar in niw intinc agi nonintagi { // Cycle through ranking variables

	merge 1:1 year using `scf`rankvar'', assert(1 3)
	assert _merge == 1 if !inrange(year, 1989, 2019) | mod(year - 1989, 3) > 0
	assert _merge == 3 if inrange(year, 1989, 2019) & mod(year - 1989, 3) == 0
	drop _merge

 	#delimit ;
	rename (*y? *a1 *a2 *r?) 
		(*y?_scf_`rankvar'rank *a1_scf_`rankvar'rank *a2_scf_`rankvar'rank *r?_scf_`rankvar'rank);
	#delimit cr
}

	/***************************************************************************
		(4.5) Clean up, sort, and save
	***************************************************************************/

qui ds year, not
order `r(varlist)', alphabetic last

isid year
sort year
export delimited using "~\cmd_replication_public\matlab\build\cmdinputs_top01bot99pt9_20210615.csv", replace